package org.lionsoul.websnail.analyzer;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.lionsoul.websnail.downloader.Page;
import org.lionsoul.websnail.Spider;
import org.lionsoul.websnail.util.DefaultLinkExtractor;

/**
 * default page analyzer <br />
 *
 * @author chenxin,yangjian
 */
public class DefaultAnalyzer implements Analyzer {
    /**
     * @see Analyzer#process(Spider, Page)
     */
    @Override
    public void process(Spider spider, Page page) {
        //configure the link extractor
        DefaultLinkExtractor linkExtractor = (DefaultLinkExtractor) page.getLinkExtractor();
        linkExtractor.enableExternalLinkClear().enableHttpsClear();

        //pull the request urls
        spider.pushRequestUrl(page.getLinks());

        Document doc = Jsoup.parse(page.getHtml());


        System.out.println(page.getUrl());
        System.out.println(doc.title());
    }

}
